This short Markdown file shows how to query the WsprDaemon Timescale Database using R and SQL.

Setup packages

library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
data(World)
dt_options <- list(pageLength=7, scrollX = "400px")

Connect to the database

The settings are available over here.

db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"  
db_user <- "wdread"  
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
                    dbname = db_name,
                    host = db_host,
                    user = db_user,
                    password = db_password)  

Check if it worked:

dbListTables(db_con) 
## [1] "spots"

Yes!

Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):

res <- dbSendQuery(db_con,
  "SELECT * FROM spots
   ORDER BY wd_time DESC
   LIMIT 20;")
dbFetch(res) %>%
  datatable(options = dt_options)

Yes – works.

dbClearResult(res)

Grab a time range

All transmisions

Let’s try one day’s worth of data – reception reports of M0INF. There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign use single quotes.

one_day <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-03T00:00:00Z'
         AND wd_time < '2021-01-04T00:00:00Z';")

This fetches the data to a data frame:

the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
  datatable(options = dt_options)

Now plot:

the_dat %>%
  ggplot(aes(wd_time, distance)) +
  geom_point() +
  geom_smooth(method = "gam",
              formula = y ~ s(x, k = 20)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports of M0INF",
       subtitle = "3 Jan 2021, 20m band") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")

Both transmissions and reception reports

Let’s do the same again; however, now with reception reports both by and of M0INF.

one_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))

The graph is a bit crowded so the y-axis is on the \(\log_{10}\) scale.

the_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Which of those reports are over 5000 km?

the_dat_send_rec %>%
  filter(distance > 5000) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
VK3MO M0INF 16880 7
M0INF KD2OM 5635 2
M0INF PT2FHC 8786 1

And which are under 100 km?

the_dat_send_rec %>%
  filter(distance < 100) %>%
  group_by(CallSign, Reporter, distance) %>%
  summarise(Spots = n()) %>%
  arrange(desc(Spots)) %>%
  kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
CallSign Reporter distance Spots
M0INF GB0SNB/SDR 32 58

Try two days (now there’s more data)

two_day_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-03T00:00:00Z'
   AND wd_time <  '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  stat_smooth(geom="line",
              alpha = 0.5, size = 0.8,
              method = "gam",
              formula = y ~ s(x, k = 25)) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…

two_dat_send_rec %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "3-4 Jan 2021, 20m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Again on the 40 m band

day_40m_send_receive <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
   AND wd_time >= '2021-01-05T00:00:00Z'
   AND wd_time <  '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
  mutate(Direction = ifelse(Reporter == "M0INF",
                            "Rx by M0INF",
                            "Tx by M0INF"))
day_40m_send_receive_dat %>%
  ggplot(aes(wd_time, distance, colour = Direction)) +
  geom_point(alpha = 0.5, size = 1) +
  labs(x = "Time", y = "Distance (km)",
       title = "WSPR reports",
       subtitle = "5 Jan 2021, 40m band") +
  scale_y_continuous(trans = "log10") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
  theme(legend.position="bottom")

Track signal strength by reporter over time…

First, look at data where there’s more than one report.

dat_sent_many <- the_dat %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many %>%
  kable()
Reporter spots
GB0SNB/SDR 58
IW2NKE 22
YU/G8ZMF 10
SA2KHG 8
OE1WYC 7
IZ7VHF/RX 6
UA3245SWL 6
the_dat %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
  geom_smooth(span = 1, se = FALSE) +
  labs(x = "Time", y = "Signal report (dB)",
       title = "WSPR reports (20m) – signal strength",
       colour = "Reporter") +
  scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Try again for latest 40m data:

one_day_40m <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE \"CallSign\" = 'M0INF'
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-06T00:00:00Z';")
dat_40m <- dbFetch(one_day_40m)
dbClearResult(one_day_40m)
dat_sent_many_40 <- dat_40m %>%
  group_by(Reporter) %>%
  summarise(spots = n()) %>%
  arrange(desc(spots)) %>%
  #filter(spots >= 5)
  slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many_40 %>%
  kable()
Reporter spots
OE9HLH 36
OE9GHV 33
HB9TMC 32
F4GFZ 22
DK8FT/A 21
IW2NKE 21
DK2DB 20
dat_40m %>%
  mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
  filter(Reporter %in% unique(dat_sent_many_40$Reporter)) %>%
  ggplot(aes(wd_time, dB, colour = ReportDist)) +
    geom_smooth(span = 2, se = FALSE, size = 0.8) +
    geom_point(size = 0.6, alpha = 0.5) +
    labs(x = "Time", y = "dB",
         title = "WSPR reports (40m) – signal strength",
         subtitle = "A selection of reporters on 5 Jan 2021",
         colour = "Reporter") +
    scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Last hour sent/received from IO91

Jitter plot

Here are the WSPR frequencies, for labeling the plot in a moment.

wspr_bands <- tibble(
  MHz = c(
    0.136,
    0.4742,
    1.8366,
    3.5686,
    5.2872,
    5364.7,
    7.0386,
    10.1387,
    14.0956,
    18.1046,
    21.0946,
    24.9246,
    28.1246,
    50.293,
    70.091,
    144.489,
    432.300,
    1296.500
  ),
  m = 299.792458 / MHz,
  wavelength_text = ifelse(m >= 1,
                           paste(round(m, 0), "m"),
                           paste(round(m*100, 0), "cm"))
) %>%
  arrange(MHz)

wspr_bands %>% kable()
MHz m wavelength_text
0.1360 2204.3563088 2204 m
0.4742 632.2067862 632 m
1.8366 163.2323086 163 m
3.5686 84.0084229 84 m
5.2872 56.7015543 57 m
7.0386 42.5926261 43 m
10.1387 29.5691221 30 m
14.0956 21.2685134 21 m
18.1046 16.5589109 17 m
21.0946 14.2118105 14 m
24.9246 12.0279747 12 m
28.1246 10.6594390 11 m
50.2930 5.9609182 6 m
70.0910 4.2771891 4 m
144.4890 2.0748462 2 m
432.3000 0.6934824 69 cm
1296.5000 0.2312321 23 cm
5364.7000 0.0558824 6 cm

Grab the data:

last_whispers <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
   AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 1972

Group into WSPR bands:

joined_dat <- the_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.2,
                       distance_col = "dist_band_start") %>%
  mutate(MHz.y = as.factor(MHz.y))

Check how many kHz off the match is (also look for NAs):

summary(joined_dat$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.390   1.504   1.556   1.677   1.582  79.074

Plot:

joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = MHz.y)) +
  geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time))) + 
  theme(legend.position = "none")

Try again, but this time separating the transmissions and receptions in IO91.

joined_dat <- joined_dat %>%
  mutate(direction = ifelse(grepl(
    pattern = "IO91",
    x = Grid,
    ignore.case = TRUE
  ), "Sent from IO91", "Received in IO91"))
joined_dat %>%
  ggplot(aes(distance, MHz.y, colour = direction)) +
  geom_point(position = position_jitterdodge(jitter.width = .1,
                                             jitter.height = 0,
                                             dodge.width = 0.4),
             
             alpha = 1/3, size = 1) +
  xlab("Distance (km)") +
  ylab("Freq (MHz)") +
  labs(title = "WSPR spots IO91 (sent or received)",
       subtitle = paste0("From ", min(the_dat$wd_time),
                         " to ", max(the_dat$wd_time)),
       colour = "Direction")

Polar plot

Let’s do the same again but also showing the direction, for transmissions from IO91 only.

tx_only <- joined_dat %>%
  filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
  ggplot(aes(azimuth, distance, colour = MHz.y)) +
  geom_point(alpha = 0.5) +
  labs(
    title = "WSPR spots IO91",
    subtitle = paste0("From ", min(the_dat$wd_time),
                      " to ", max(the_dat$wd_time)),
    colour = "Freq (MHz)",
    x = NULL,
    y = "Distance"
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

linear_scale_polar

linear_scale_polar +
  scale_y_continuous(trans = "log10")
## Warning: Transformation introduced infinite values in continuous y-axis

tx_only %>%
  filter(distance > 8000) %>%
  select(CallSign, Reporter, distance, azimuth, MHz.x) %>%
  kable()
CallSign Reporter distance azimuth MHz.x
G4LRP DP0GVN-1 13544 183 14.09715
G1CPC PY2SDR 9515 223 14.09709

A bigger one - 48 hours of spots on all frequencies to/from IO91

IO91_48hrs <- dbSendQuery(db_con,
  "SELECT * FROM spots
   WHERE (\"ReporterGrid\" LIKE 'IO91%'
          OR \"Grid\" LIKE 'IO91%')
         AND wd_time >= '2021-01-05T00:00:00Z'
         AND wd_time < '2021-01-07T00:00:00Z';")
IO91_48hrs_dat <- dbFetch(IO91_48hrs)
dbClearResult(IO91_48hrs)
nrow(IO91_48hrs_dat)
## [1] 189067
IO91_48hrs_dat_bands <- IO91_48hrs_dat %>%
  difference_left_join(wspr_bands,
                       by = "MHz",
                       max_dist = 0.1,
                       distance_col = "dist_band_start") %>%
  mutate(
    MHz.y = as.factor(MHz.y),
    direction = ifelse(
      grepl(
        pattern = "IO91",
        x = Grid,
        ignore.case = TRUE
      ),
      "From IO91",
      "To IO91"
    )
  )
IO91_48hrs_dat_bands %>%
  filter(dist_band_start > 5/1000) %>%
  select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
  arrange(desc(dist_band_start))  %>%
  datatable(options = dt_options)
IO91_48hrs_dat_bands %>%
  filter(is.na(MHz.y)) %>%
  select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
  datatable(options = dt_options)
summary(IO91_48hrs_dat_bands$dist_band_start*1000)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.362   1.456   1.505   2.619   1.555  79.092       2
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(aes(y=..density..), binwidth = 500, position = "identity", alpha = 0.5) +
  facet_grid(rows = vars(direction)) +
  labs(x = "Distance (km)", y = "Mass", fill = "Freq (MHz)")

date_format <- "%d %b %Y (%H:%m)"
IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y)) %>%
  ggplot(aes(x = distance, fill = MHz.y)) +
  geom_histogram(
    #aes(y = ..density..),
    binwidth = 200,
    position = "identity",
  ) +
  facet_grid(cols = vars(direction), rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports to/from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  theme(legend.position = "none")

Polar map

IO91_48hrs_dat_bands %>%
  filter(!is.na(MHz.y) & direction == "From IO91") %>%
  ggplot(aes(azimuth, y = distance)) +
  geom_point() +
  facet_grid(rows = vars(MHz.y)) +
  labs(
    x = "Distance (km)",
    y = "Mass",
    fill = "Freq (MHz)",
    title = "WSPR reports from IO91 by band",
    subtitle = paste0(format(
      min(IO91_48hrs_dat_bands$wd_time), date_format
    ), " to ",
    format(
      max(IO91_48hrs_dat_bands$wd_time), date_format
    ))
  ) +
  scale_x_continuous(
    limits = c(0, 360),
    breaks = seq(0, 315, by = 45),
    minor_breaks = seq(0, 360, by = 15)
  ) +
  coord_polar()

Plot on maps

IO91_48hrs_dat_bands_sf <- IO91_48hrs_dat_bands %>%
  st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
           crs = 4326)

This step takes up to two minutes, so I have cached it in the markdown. Hopefully it will go quickly…

t1 <- Sys.time()
IO91_48hrs_dat_bands_sf %>%
  filter(!is.na(MHz.y)) %>%
  ggplot() +
    geom_sf(data = World) +
    geom_sf(aes(colour = MHz.y)) +
    facet_grid(rows = vars(MHz.y)) +
    theme_classic() +
    labs(
      colour = "Freq (MHz)",
      title = "WSPR reports from IO91 by band",
      subtitle = paste0(format(
        min(IO91_48hrs_dat_bands$wd_time), date_format
      ), " to ",
      format(
        max(IO91_48hrs_dat_bands$wd_time), date_format
      ))
    ) +
    theme(legend.position = "none")

t2 <- Sys.time()
t2 - t1
## Time difference of 0.05691385 secs

Try fitting a mixture model

library(mixtools)
## mixtools package, version 1.2.0, Released 2020-02-05
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.
IO91_48hrs_dat_bands %>%
  group_by(MHz.y) %>%
  summarise(spots = n(),
            dist_p25 = quantile(distance,p=.25),
            dist_p75 = quantile(distance,p=.75)) %>%
  kable()
## `summarise()` ungrouping output (override with `.groups` argument)
MHz.y spots dist_p25 dist_p75
0.136 403 496.00 1035.00
0.4742 33873 357.00 867.00
1.8366 12115 217.00 825.00
3.5686 13935 253.00 720.00
5.2872 7845 480.00 928.00
7.0386 86851 644.00 1821.00
10.1387 15067 891.00 1926.00
14.0956 18020 513.00 2904.00
18.1046 155 2535.00 5234.50
21.0946 225 18.00 60.00
24.9246 112 18.00 18.00
28.1246 442 54.00 86.00
50.293 22 54.00 54.00
NA 2 3684.75 5014.25
dat_for_clust <- IO91_48hrs_dat_bands %>%
  filter(MHz.y == "14.0956" & direction == "From IO91")
nrow(dat_for_clust)
## [1] 13805
cluster_n = 6
start_means <- c(0, 1500, 6000, 10000, 12000, 17000)
#means <- c(NA, paste0(1:(cluster_n-1), "n"))
sds   <- c(NA, rep("b", cluster_n-1))


clust_res <- normalmixEM(dat_for_clust$distance,
                         mu = start_means,
                         #mean.constr = means,
                         sd.constr = sds,
                         k = cluster_n)
## number of iterations= 12
summary(clust_res)
## summary of normalmixEM object:
##          comp 1      comp 2      comp 3      comp 4      comp 5      comp 6
## lambda  0.23763    0.545196    0.194094 1.44116e-02 8.59046e-04 7.80888e-03
## mu     27.82031 1805.103140 5900.153689 9.17730e+03 1.16025e+04 1.37087e+04
## sigma  19.64022  690.233625  690.233625 6.90234e+02 6.90234e+02 6.90234e+02
## loglik at estimate:  -113114.3
#plot(clust_res, which = 2)
clusters <- clust_res$posterior %>%
  as.data.frame() %>%
  rowwise() %>%
  mutate(cluster = which.max(c_across()))

dat_for_clust$cluster <- factor(clusters$cluster)

dat_for_clust %>%
  ggplot(aes(x = distance, fill = cluster)) +
  geom_histogram(alpha = 0.5,
                 position = "identity",
                 binwidth = 500) +
  labs(x = "Distance (km)",
       y = "Count",
       fill = "Cluster")

clust_means <- dat_for_clust %>%
  group_by(cluster) %>%
  summarise(clust_mean_dist = mean(distance))
## `summarise()` ungrouping output (override with `.groups` argument)
dat_for_clust_sf <- dat_for_clust %>%
  left_join(clust_means) %>%
  st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
           crs = 4326)
## Joining, by = "cluster"
dat_for_clust_sf %>%
  ggplot() +
    geom_sf(data = World) +
    geom_sf(aes(colour = cluster)) +
    theme_classic() +
    labs(
      colour = "Cluster",
      title = "WSPR reports from IO91 (clusters)",
      subtitle = paste0(format(
        min(IO91_48hrs_dat_bands$wd_time), date_format
      ), " to ",
      format(
        max(IO91_48hrs_dat_bands$wd_time), date_format
      ))
    )

dat_for_clust_sf %>%
  ggplot(aes(x = wd_time, y = dB, colour = ordered(round(clust_mean_dist,0)))) +
  #geom_smooth(span = 3, se = FALSE, size = 2) 
  geom_jitter(size = 2, alpha = 0.5) +
  labs(colour = "Mean cluster dist (km)", x = NULL)

db plots again - the easy way…